/* Purpose: This do-file brings in the cleaned-up ANES surveys and creates the variables
            necessary for the intergenerational mobility analysis. Sample restricted to 
            native-born men and women ages 30-50 who report father's occ.

Input: ANES_56to70cleaner.dta (using ANES_Cleaner file)
Output: ANES56to70_analysis.dta */

clear 
set more off

cd "$Mydirectory1/1_DataSources/ANES/"	
*-----------------------------------------------------------------------------*
*-----------------------------------------------------------------------------*

*************************
*** BRING IN DATA AND RESTRICT SAMPLE
*************************

use "./output/ANES_56to70cleaner.dta"

	#delimit ;
	keep id_anes tag* R_* flag* state ageR sex race year employed union_hh unionR eduR fam_inc* stategrow head *married widowed divorced separated region urban topcoded bottomcoded yrs*
	placeborn placegrew fatheroccej motheroccej *_notworking headofhh* occHH_ej fatherforeign foreignborn wifeR weight occR* employedHH relate fatherocc  ;
	#delimit cr
	rename union_hh unionH
	rename occR_ej occRej
	rename occHH_ej occHHej

** RESTRICT ANES sample to ages 30-50 and no foreignborn
	keep if ageR>=30 & ageR<=50 
	keep if (foreignborn==0 | foreignborn==.) 
	
*-----------------------------------------------------------------------------*
*-----------------------------------------------------------------------------*

*******************
**** FAMILY INCOME 
*******************

	replace fam_inc = fam_inc_10bins if year==1968 //All years have 9-10 bins.

/* Convert fam_inc into 1950 dollars using the CPI: https://data.bls.gov/timeseries/CUUR0000SA0 */
	gen CPI1950 = 24.1
	gen fam_inc_real =.
	replace fam_inc_real = fam_inc * (CPI1950/27.2) if year==1956
	replace fam_inc_real = fam_inc * (CPI1950/28.9) if year==1958
	replace fam_inc_real = fam_inc * (CPI1950/29.6) if year==1960
	replace fam_inc_real = fam_inc * (CPI1950/31.0) if year==1964
	replace fam_inc_real = fam_inc * (CPI1950/32.4) if year==1966
	replace fam_inc_real = fam_inc * (CPI1950/34.8) if year==1968
	replace fam_inc_real = fam_inc * (CPI1950/38.8) if year==1970
	label var fam_inc_real "Family income, in 1950 dollars"
	
	** Logged family income
	gen lnfaminc=ln(fam_inc_real)
	label var lnfaminc "Logged family income, real"
	

*-----------------------------------------------------------------------------*
*-----------------------------------------------------------------------------*

* Fix a couple of codes that won't have a mapping to income scores
	foreach var of varlist occHHej occRej fatheroccej {
	replace `var'=78 if `var'==79 //put unskilled laborers nec group with other unskilled laborers
	replace `var'=35 if `var'==37 //put sales nec with middle sales category
	replace `var'=17 if `var'==19 //put professional nec with "other professional"
	}

* Variable for father being either farm laborer or operator
	gen fatherfarm=0
	replace fatherfarm=. if fatheroccej==.
	replace fatherfarm=1 if fatheroccej==71 | fatheroccej==81


*-----------------------------------------------------------------------------*
*-----------------------------------------------------------------------------*

*******************
** CLEAN OTHER VARIABLES
*******************

** Re-label a couple vars
	label var ageR "Respondent's age"
	label var race "Race"
	label var state "State"

** Age squared 
	rename ageR age
	replace age=. if age==0
	gen agesq=age^2
	label var agesq "Respondent's age squared"

** Foreignborn
	label var foreignborn "R is foreignborn"
	label var fatherforeign "Father is foreign"

** Clean up a few salient educ categories 
	tab yrsschool eduR 
	
	gen hs_ed=eduR>=4 if eduR<. 
	gen coll_ed=eduR>=6 if eduR<.
	label var hs_ed "HS educated" 
	label var coll_ed "Coll educated"
	
	gen yrsschool_bin=.
	replace yrsschool_bin = 0 if eduR==0
	replace yrsschool_bin = 6 if eduR==1 
	replace yrsschool_bin = 8 if eduR==2
	replace yrsschool_bin = 10 if eduR==3
	replace yrsschool_bin = 12 if eduR==4
	replace yrsschool_bin = 14 if eduR==5
	replace yrsschool_bin = 16 if eduR==6
	tab yrsschool_bin, m
	
** Assumption: If a woman doesn't have an occupation, she's not working.
	replace employed =0 if sex==2 & employed==. & (year==1956 | year==1958 | year==1960 | year==1964)
	//In 1966 some of the women are household heads.
	replace employed = employedHH if relate==1 & year==1966
	replace employed =0 if sex==2 & employed==. & (year==1968 | year==1970)
	
** Black 
	gen black=race==2 if race<.
	label var black "Black"

** Female 
	gen female=sex==2 if sex<. 
	label var female "Female" 
	
*-----------------------------------------------------------------------------*
*-----------------------------------------------------------------------------*

*******************
**** REGION VARIABLES
*******************

	rename region region_anes
	label var region_anes "Region, ANES classification"

	* Variable for respondents in southern states (solid south + tennessee + oklahoma)
	gen south=0
	replace south=1 if region_anes==5
	label var south "South"

	* Nonsouth variable
	gen nonsouth=1-south
	
	* Generate current region to match Census divisions
	gen region=.
	replace region=1 if state>=1 & state<=3 //new england
	replace region=2 if state>=12 & state<=14 //mid atlantic
	replace region=3 if state>=21 & state<=25 //east north central
	replace region=4 if state>=31 & state<=37 //west north central
	replace region=5 if state==40 | state==43 | state==44 | state==47 | state==48 | state==52 | state==55 //south atlantic (VA, GA, FL, NC, SC, MD, DC)
	replace region=6 if state==41 | state==46 | state==51 | state==54 //east south central (AL, MS, KY, TN)
	replace region=7 if state==42 | state==45 | state==49 | state==53 //west south central (AK, LA, TX, OK)
	replace region=8 if state>=61 & state<=67 // mountain
	replace region=9 if state>=71 & state<=73 //pacific
	tab region, gen(region_) 

	gen region4=.
	replace region4=1 if region==1 | region==2
	replace region4=2 if region==3 | region==4
	replace region4=3 if region==5 | region==6 | region==7
	replace region4=4 if region==8 | region==9
	label var region4 "Current Census region R"
	
	* Variable for respondents born in the south (solid south + tennessee + oklahoma + general south)
	gen bornsouth=0
	replace bornsouth=. if placeborn==999 | placeborn==100
	replace bornsouth=1 if (placeborn>=140 & placeborn<=150) | placeborn==154 | placeborn==153 | placeborn==158
	label var bornsouth "R born in solid south, TN, or OK"

	* Variable for born in south, currently living in north
	gen migration=0
	replace migration=. if bornsouth==. 
	replace migration=1 if bornsouth==1 & nonsouth==1
	label var migration "Great Migration"
	
	* Generate region to match Census divisions
	preserve
	use "../Crosswalks/ANES_FIPS_xwalk.dta", clear 
		keep state_anes state_fips
		rename state_anes placeborn
		rename state_fips fips
		label var placeborn "State of birth, ANES codes"
		
		tempfile state_crosswalk
		save `state_crosswalk'
	restore
	
	merge m:1 placeborn using `state_crosswalk'
		tab placeborn if _merge==1
		drop _merge
		
* Region born
	gen region4_born=.
	* Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
	replace region4_born=1 if fips==9 | fips==23 | fips==25 | fips==33 | fips==44 | fips==50 | fips==34 | fips==36 | fips==42
	* Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
	replace region4_born=2 if fips==17 | fips==18 | fips==26 | fips==39 | fips==55 | fips==19 | fips==20 | fips==27 | fips==29 | fips==31 | fips==38 | fips==46
	/* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
			  Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
	replace region4_born=3 if fips==10 | fips==11 | fips==12 | fips==13 | fips==24 | fips==37 | fips==45 | fips==51 | fips==54 ///
	| fips==1 | fips==21 | fips==28 | fips==47 | fips==5 | fips==22 | fips==40 | fips==48
	* West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington 
	replace region4_born=4 if fips==4 | fips==8 | fips==16 | fips==30 | fips==32 | fips==35 | fips==49 | fips==56 | fips==6 | fips==41 | fips==53 | fips==2 | fips==15
	label var region4_born "Census region R born"
		
	rename fips bpl
	label var bpl "Birthplace, FIPS codes"
	
*crosswalk stategrow to state fips, then create state_childhood 
	gen state_anes=stategrow+100 //Convert stategrow back to its original format.
	merge m:1 state_anes using "../Crosswalks/ANES_FIPS_xwalk.dta"
	tab state_anes if _merge==1, m 
	drop _merge
	
	drop state_anes state_name_caps C state_name
	rename state_ab stateab_childhood
	rename state_fips state_childhood
	label var state_childhood "State where respondent grew up"

*region childhood
	gen fips = state_childhood
	gen region4_childhood=.
	* Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
	replace region4_childhood=1 if fips==9 | fips==23 | fips==25 | fips==33 | fips==44 | fips==50 | fips==34 | fips==36 | fips==42
	* Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
	replace region4_childhood=2 if fips==17 | fips==18 | fips==26 | fips==39 | fips==55 | fips==19 | fips==20 | fips==27 | fips==29 | fips==31 | fips==38 | fips==46
	/* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama, Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
	replace region4_childhood=3 if fips==10 | fips==11 | fips==12 | fips==13 | fips==24 | fips==37 | fips==45 | fips==51 | fips==54 | fips==1 | fips==21 | fips==28 | fips==47 | fips==5 | fips==22 | fips==40 | fips==48
	* West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington 
	replace region4_childhood=4 if fips==4 | fips==8 | fips==16 | fips==30 | fips==32 | fips==35 | fips==49 | fips==56 | fips==6 | fips==41 | fips==53 | fips==2 | fips==15

	label var region4_childhood "Region R grew up"
	drop fips

* Moved regions
	gen moved_region = region4 != region4_born if region4_born<.
	label var moved_region "Moved Census region"

*Label region vars
	label define region_l 1 "NORTHEAST" 2 "MIDWEST" 3 "SOUTH" 4 "WEST"
	label values region4 region4_born region4_childhood region_l
	tab region4_born, m
	tab region4_childhood, m
	tab region4, m
	

*-----------------------------------------------------------------------------*
*-----------------------------------------------------------------------------*

*******************
*** COHORT VARIABLES
*******************
 
** BIRTH COHORTS
	levelsof year, local(years2)
	gen dob=.
	foreach x of local years2{
	replace dob=`x'-age if year==`x'
	}
	
* Assign everyone the decade in which they were born
	tab dob, m
	gen decade=.
	replace decade=1900 if dob>=1900 & dob<=1909
	replace decade=1910 if dob>=1910 & dob<=1919
	replace decade=1920 if dob>=1920 & dob<=1929
	replace decade=1930 if dob>=1930 & dob<=1940 
	label var decade "Decade of birth"
	
* Generate dummies for each decade
	tab decade, gen(decade_)

*-----------------------------------------------------------------------------*
*-----------------------------------------------------------------------------*

	rename migration migr

	global institution_list "black migr south unionR unionH urban hs_ed eduR coll_ed"

** Demean the variables that we will use
	foreach var in $institution_list {
	sum `var'
	gen `var'_dm = `var'- `r(mean)'
	local temp: var label `var' 
	label var `var'_dm "`temp', demeaned" 
	}


*------------------------------------------------------------------------------*

	label var yrsschool "Years of school"
	label var sex "Respondent sex"
	label var occRej "Respondent occupation, coarsened"
	label var eduR "Respondent education, consistent categories"
	label var bottomcoded_son "Son's family income, bottom coded"
	label var topcoded_son "Son's family income, top coded"
	label var fatherfarm "Father has farming occ."
	label var yrsschool_bin "Years of school, binned"
	label var dob "Year of birth"	
	
***************	
**** SAVE
***************

	rename weight weight_anes
		
	//create a second ID variable that is shorter
	duplicates tag id_anes, gen(dup)
	tab dup //no duplicates
	
	gen temp1 = _n
	rename id_anes year_id_anes 
	label var year_id_anes "ID, including year"
	rename temp1 id_anes
	
	compress
	save "./output/ANES56to70_analysis.dta", replace	
	
